1 Prepare R environment

# Load libraries
library(RSocrata)
library(readr)
library(rgdal)
library(dplyr)
library(ggplot2)
library(mapproj)
library(RColorBrewer)
library(graphZoo)

# Custom functions
capwords <- function(s, strict = FALSE) {
    cap <- function(s) paste(toupper(substring(s, 1, 1)),
                  {s <- substring(s, 2); if(strict) tolower(s) else s},
                             sep = "", collapse = " " )
    sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}

# Prepare color-blind friendly palette
cbf <- brewer.pal(8, "Dark2")

Back to top


2 Vehicular collisions in Manhattan (2014)

2.1 Load data

# Load collision data
if (!file.exists("../data/collisions2014.csv")) {
  collisions <- read.socrata("https://data.cityofnewyork.us/resource/h9gi-nx95.csv?borough=MANHATTAN&$where=date >= '2014-01-01' AND date <= '2014-12-31'")
  write_csv(collisions, "../data/collisions2014.csv")
} else {
  collisions <- read_csv("../data/collisions2014.csv")
}
## Warning: 663 problems parsing '../data/collisions2014.csv'. See
## problems(...) for more details.
# Load map data
if (!file.exists("../map/nybb.shp")) {
  download.file("http://www.nyc.gov/html/dcp/download/bytes/nybb_15a.zip",
                "../map/nybb_15a.zip")
  unzip("../map/nybb_15a.zip", exdir = "../map/", junkpaths = TRUE)
}

nyc_map <- spTransform(readOGR("../map/nybb.shp", layer = "nybb", verbose = FALSE), 
                       CRS("+proj=longlat +datum=WGS84"))

Back to top


2.2 Daily collisions

daily1 <- collisions %>%
  mutate(DATE = as.Date(DATE)) %>%
  mutate(DAY = factor(weekdays(DATE, abbreviate = TRUE), 
                      c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))) %>%
  group_by(DAY, DATE) %>%
  summarize(n = length(DAY))

daily2 <- daily1 %>%
  group_by(DAY) %>%
  summarize(mean = mean(n)) %>%
  mutate(group = 1)

g <- ggplot() +
  geom_point(data = daily1, 
             aes(x = DAY, y = n, color = DAY),
             position = position_jitter(width = .25)) +
  geom_point(data = daily2, 
             aes(x = DAY, y = mean, group = group), 
             size = 5) + 
  geom_line(data = daily2, 
             aes(x = DAY, y = mean, group = group)) + 
  theme_graphzoo(base_size = 15) + guides(color = FALSE) +
  scale_color_manual(values = cbf[1:7]) +
   xlab(NULL) + ylab(NULL) 

g <- addTitle(g, "Manhanttan's daily vehicular collisions rate (Jan-Dec 2014)", n.lines = 2)
 
g <- addBanner(g, font.size = 4, heights = c(1, 0.05 * 8 / 6),
               l.txt = "GRAPHZOO.TUMBLR.COM", 
               r.txt = "SOURCE: DATA.CITYOFNEWYORK.US") 

g

Figure 1: Figure caption.

Back to top


2.3 Hourly collisions

hourly1 <- collisions %>%
  mutate(HOUR = factor(as.numeric(gsub(":[0-9]+", "", TIME)))) %>%
  group_by(HOUR, DATE) %>%
  summarize(n = length(HOUR)) 

hourly2 <- hourly1 %>%
  group_by(HOUR) %>%
  summarize(mean = mean(n)) %>%
  mutate(group = 1)

g <- ggplot() +
  geom_point(data = hourly1, 
             aes(x = HOUR, y = n, color = n),
             position = position_jitter(width = .25),
             alpha = 0.25) +
  geom_point(data = hourly2, 
             aes(x = HOUR, y = mean, group = group), 
             size = 5) + 
  geom_line(data = hourly2, 
             aes(x = HOUR, y = mean, group = group)) + 
  theme_graphzoo(base_size = 15) + guides(color = FALSE) +
  scale_color_gradient2(low = cbf[3], mid = cbf[2], high = cbf[6], midpoint = 12.5) + 
  xlab(NULL) + ylab(NULL) +
  scale_x_discrete(breaks = seq(0, 23, 2), labels = paste0(seq(0, 23, 2), ":00"))

g <- addTitle(g, "Manhattan's hourly vehicular collision rate (Jan-Dec 2014)", n.lines = 2)
 
g <- addBanner(g, font.size = 4, heights = c(1, 0.05 * 8 / 6),
               l.txt = "GRAPHZOO.TUMBLR.COM", 
               r.txt = "SOURCE: DATA.CITYOFNEWYORK.US")

g

Figure 2: Figure caption.

Back to top


2.4 Most dangerous crossings

cross <- collisions %>%
  mutate(order = ON.STREET.NAME > CROSS.STREET.NAME) %>%
  rowwise() %>%
  mutate(cross = ifelse(order, paste0(ON.STREET.NAME, " - ", CROSS.STREET.NAME),
                paste0(CROSS.STREET.NAME, " - ", ON.STREET.NAME))) %>%
  group_by(cross) %>%
  summarize(n = length(cross), lat = mean(LATITUDE), long = mean(LONGITUDE)) %>%
  filter(cross != " - ") %>%
  arrange(desc(n)) %>%
  mutate(cross = capwords(tolower(cross)))
## Warning: Grouping rowwise data frame strips rowwise nature
g <- ggplot(cross[1:50,], aes(x = reorder(factor(cross), n), y = n)) +
  geom_bar(stat = "identity", color = "white", fill = cbf[2]) + 
  coord_flip() + 
  theme_graphzoo(base_size = 14) + guides(color = FALSE) +
  theme(axis.text.y = element_text(size = 8)) +
  xlab(NULL) + ylab("Number of vehicular collisions") 

g <- addTitle(g, "Manhattan's 50 most dangerous crossings (Jan-Dec 2014)", n.lines = 1, font.size = 15)
 
g <- addBanner(g, font.size = 3, heights = c(1, 0.05 * 6 / 9),
               l.txt = "GRAPHZOO.TUMBLR.COM", 
               r.txt = "SOURCE: DATA.CITYOFNEWYORK.US")

m <- ggplot() + 
  geom_polygon(data = filter(fortify(nyc_map), id == "2"), 
               aes(x = long, y = lat, group = group), 
               color = "grey50", size = 1, fill = "grey50") +
  geom_point(data = cross[1:50,], aes(x = long, y = lat), 
             color = "white", alpha = 0.5) +
  coord_map(orientation = c(68.5, 0, 50)) + 
  theme_graphzoo() +
  theme(plot.background = element_rect(fill = rgb(0, 0, 0, 0), color = rgb(0, 0, 0, 0)),
        axis.line = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        plot.margin = unit(c(-.02, -.13, -0.06, -.2), "snpc")) 
## Regions defined for each Polygons
vp <- viewport(width = 0.6, height = 0.6, x = 0.86, y = 0.41)
print(g)
print(m, vp = vp) 

Figure 3: Figure caption.

Back to top


2.5 Map

map <- filter(fortify(nyc_map), id == "2")
## Regions defined for each Polygons
tmp <- mapproject(map$long, map$lat, orientation = c(68.5, 0, 50))
map <- mutate(map, long_rot = tmp$x, lat_rot = tmp$y)

tmp <- mapproject(cross$long, cross$lat, orientation = c(68.5, 0, 50))
cross <- mutate(cross, long_rot = tmp$x, lat_rot = tmp$y)

g <- ggplot() + 
  geom_polygon(data = map, 
               aes(x = long_rot, y = lat_rot, group = group), 
               color = "gray15", size = 1, fill = "gray15") +  
  geom_point(data = cross, 
             aes(x = long_rot, y = lat_rot, color = n, fill = n, size = n), 
             alpha = 0.5) +
  coord_fixed() +
  scale_color_gradient("", low = "red", high = "yellow", 
                       breaks = c(25, 50, 75, 100, 125), 
                       labels = c(25, 50, 75, 100, 125)) +
  scale_fill_gradient("", low = "red", high = "yellow", 
                       breaks = c(25, 50, 75, 100, 125), 
                       labels = c(25, 50, 75, 100, 125)) +
  scale_size_continuous("", breaks = c(25, 50, 75, 100, 125), 
                        labels = c(25, 50, 75, 100, 125)) +
  guides(fill = guide_legend(nrow = 1, override.aes = list(shape = 21, alpha = 0.75)),
         color = FALSE) +
  theme_graphzoo(base_size = 6) +
  theme(plot.background = element_rect(fill = rgb(0, 0, 0, 0), color = rgb(0, 0, 0, 0)),
        legend.justification=c(1, 1), legend.position=c(1.6, 0.9), 
        legend.key.width=unit(1, "line"),
        axis.line = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        plot.margin = unit(c(0, 0, -0.05, 0), "snpc"))

b <- ggplot(data.frame(x = c(0, 1), y = c(0, 1)), 
            aes(x = x, y = y)) + 
  geom_blank() + 
  annotate("text", x = 1, y = 0.99, 
           label = "A year of accidents", 
           hjust = 1, family = "Avenir Next", size = 5.5) +
  annotate("text", x = 1, y = 0.95, 
           label = "Locations and number of vehicular collisions\nManhattan, NYC (Jan-Dec 2014)", 
           hjust = 1, family = "Avenir Next", size = 2.25) +
  theme_graphzoo() +
  theme(axis.line = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        plot.margin = unit(c(0, 0, -0.05, 0), "snpc"))
 
b <- addBanner(b, font.size = 2.5, heights = c(1, 0.05 * 4 / 8),
               l.txt = "GRAPHZOO.TUMBLR.COM", 
               r.txt = "SOURCE: DATA.CITYOFNEWYORK.US")


vp <- viewport(width = 1, height = 1, x = 0.45, y = 0.52)
print(b)
print(g, vp = vp) 

Figure 4: Figure caption.

Back to top